Load the data set from Herbst et al. (2021).
load("multiomics_MAE.RData")
prot <- multiomics_MAE[["proteomics"]]
dim(prot)## [1] 7311 68
vsn::meanSdPlot(prot)Run the correlation analysis:
Only calculate the values for correlation with
TBX21.
cor_pearson <- lapply(seq_len(nrow(prot)), function(i) {
psych::corr.test(x = prot["TBX21", ], y = prot[i, ], method = "pearson",
adjust = "BH")
})
cor_spearman <- lapply(seq_len(nrow(prot)), function(i) {
psych::corr.test(x = prot["TBX21", ], y = prot[i, ], method = "spearman",
adjust = "BH")
})## create vectors with coefficient values, p-values, adjusted p-values
cor_pearson_coef <- unlist(lapply(cor_pearson, function(i) i[["r"]]))
names(cor_pearson_coef) <- rownames(prot)
cor_pearson_p <- unlist(lapply(cor_pearson, function(i) i[["p"]]))
names(cor_pearson_p) <- rownames(prot)
cor_pearson_padj <- unlist(lapply(cor_pearson, function(i) i[["p.adj"]]))
names(cor_pearson_padj) <- rownames(prot)
cor_spearman_coef <- unlist(lapply(cor_spearman, function(i) i[["r"]]))
names(cor_spearman_coef) <- rownames(prot)
cor_spearman_p <- unlist(lapply(cor_spearman, function(i) i[["p"]]))
names(cor_spearman_p) <- rownames(prot)
cor_spearman_padj <- unlist(lapply(cor_spearman, function(i) i[["p.adj"]]))
names(cor_spearman_padj) <- rownames(prot)
## bind to a data.frame
df_corr <- data.frame(
protein = names(cor_pearson_coef),
pearson_coef = cor_pearson_coef,
pearson_p = cor_pearson_p,
pearson_padj = cor_pearson_padj,
spearman_coef = cor_spearman_coef,
spearman_p = cor_spearman_p,
spearman_padj = cor_spearman_padj
)
## show the correlating features and write the values to a file
rmarkdown::paged_table(df_corr[order(abs(df_corr$pearson_coef), decreasing=TRUE), ])write.table(df_corr, file = "correlation_coefficient_pvalue_TB21.txt",
sep = "\t", quote = FALSE, row.names = FALSE)It was mentioned by Philipp, that there is a potential regulation towards RUNX3 and IRF9. What are the correlation coefficients with those?
According to https://www.genome.jp/entry/hsa:30009,
TBX21 is located in the following pathway maps:
hsa04658: Th1 and Th2 cell differentiationhsa04659: Th17 cell differentiationhsa05321: Inflammatory bowel diseaseVisualize these pathways together with their correlation coefficients.
coef <- matrix(c(df_corr$pearson_coef, df_corr$spearman_coef),
byrow = TRUE, nrow = 2)
colnames(coef) <- df_corr$protein
rownames(coef) <- c("pearson", "spearman")
pathview(gene.data = t(coef), gene.idtype = "symbol", pathway.id = "hsa04658",
same.layer = TRUE, species = "hsa", out.suffix = "TBX_pathway")## [1] "Note: 177 of 7311 unique input IDs unmapped."
pathview(gene.data = t(coef), gene.idtype = "symbol", pathway.id = "hsa04659",
same.layer = TRUE, species = "hsa", out.suffix = "TBX_pathway")## [1] "Note: 177 of 7311 unique input IDs unmapped."
pathview(gene.data = t(coef), gene.idtype = "symbol", pathway.id = "hsa05321",
same.layer = TRUE, species = "hsa", out.suffix = "TBX_pathway")## [1] "Note: 177 of 7311 unique input IDs unmapped."
Split the data set up in 0-25%-quantile group and 75-100% quantile group and perform differential abundance analysis between these two groups.
qu <- quantile(prot["TBX21", ])
qu_025 <- prot["TBX21", ] < qu["25%"]
qu_2550 <- prot["TBX21", ] >= qu["25%"] & prot["TBX21", ] < qu["75%"]
qu_75100 <- prot["TBX21", ] >= qu["75%"]
tbx21_groups <- character(length(prot["TBX21", ]))
tbx21_groups <- ifelse(qu_025, "lower", tbx21_groups)
tbx21_groups <- ifelse(qu_2550, "middle", tbx21_groups)
tbx21_groups <- ifelse(qu_75100, "upper", tbx21_groups)
## how does this grouping agree with ASB-CLL?
pg <- read.table("PGs.txt", sep = "\t", header = TRUE)
pg$PG_ASB_CLL <- ifelse(pg$PG == "ASB-CLL", "ASB-CLL", "other")
all(names(tbx21_groups) == pg$patient_ID_CLL)## [1] TRUE
table(tbx21_groups, pg$PG)##
## tbx21_groups ASB-CLL M-PG TP53-PG Tris12M-PG Tris12U-PG U-PG
## lower 5 2 3 2 1 4
## middle 5 10 1 4 2 12
## upper 2 6 0 3 5 1
table(tbx21_groups, pg$PG_ASB_CLL)##
## tbx21_groups ASB-CLL other
## lower 5 12
## middle 5 29
## upper 2 15
Perform now the differential abundance analysis between
qu_025 group and qu_75100 group. The ratios
were previously log2 transformed - transform them by taking
2^(log2(ratio)).
all(colnames(prot) == pg$patient_ID_CLL)## [1] TRUE
cD <- data.frame(sample = colnames(prot), qu = as.character(tbx21_groups))
design <- model.matrix(~ 0 + qu, data = cD)
colnames(design) <- make.names(colnames(design))
fit <- lmFit(object = 2^prot, design = design, method = "ls")
## create contrasts
contrasts <- makeContrasts(
lower_vs_upper = qulower - quupper,
levels = design)
fit_c <- contrasts.fit(fit, contrasts)
fit_eB <- eBayes(fit_c)
## set parameters for differential expression
num <- Inf
p_val <- 1
adj <- "BH"Return the log FC and p-values for differentially abundant proteins.
tT <- topTable(fit_eB, number = num, p.value = p_val, adjust.method = adj,
coef = "lower_vs_upper")
rmarkdown::paged_table(tT)## show for RUNX3 and IRF9
rmarkdown::paged_table(tT[c("RUNX3", "IRF9"), ])Print here the package versions used in this analysis.
sessionInfo()## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 19044)
##
## Matrix products: default
##
## locale:
## [1] LC_COLLATE=English_Germany.utf8 LC_CTYPE=English_Germany.utf8
## [3] LC_MONETARY=English_Germany.utf8 LC_NUMERIC=C
## [5] LC_TIME=English_Germany.utf8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] MultiAssayExperiment_1.24.0 limma_3.54.1
## [3] multiGSEA_1.8.0 MatrixQCvis_1.7.3
## [5] shiny_1.7.4 plotly_4.10.1
## [7] SummarizedExperiment_1.28.0 Biobase_2.58.0
## [9] GenomicRanges_1.50.2 GenomeInfoDb_1.34.7
## [11] IRanges_2.32.0 S4Vectors_0.36.1
## [13] BiocGenerics_0.44.0 MatrixGenerics_1.10.0
## [15] matrixStats_0.63.0 pathview_1.38.0
## [17] forcats_1.0.0 stringr_1.5.0
## [19] dplyr_1.1.0 purrr_1.0.1
## [21] readr_2.1.3 tidyr_1.3.0
## [23] tibble_3.1.8 tidyverse_1.3.2
## [25] ggplot2_3.4.0 knitr_1.42
##
## loaded via a namespace (and not attached):
## [1] rappdirs_0.3.3 bit64_4.0.5 multcomp_1.4-20
## [4] DelayedArray_0.24.0 data.table_1.14.6 rpart_4.1.19
## [7] KEGGREST_1.38.0 RCurl_1.98-1.10 doParallel_1.0.17
## [10] generics_0.1.3 metap_1.8 snow_0.4-4
## [13] preprocessCore_1.60.2 cowplot_1.1.1 TH.data_1.1-1
## [16] RSQLite_2.2.20 bit_4.0.5 tzdb_0.3.0
## [19] mutoss_0.1-12 xml2_1.3.3 lubridate_1.9.1
## [22] httpuv_1.6.8 assertthat_0.2.1 gargle_1.3.0
## [25] xfun_0.37 hms_1.1.2 jquerylib_0.1.4
## [28] evaluate_0.20 promises_1.2.0.1 fansi_1.0.4
## [31] dbplyr_2.3.0 readxl_1.4.1 Rgraphviz_2.42.0
## [34] DBI_1.1.3 htmlwidgets_1.6.1 googledrive_2.0.0
## [37] ellipsis_0.3.2 RSpectra_0.16-1 crosstalk_1.2.0
## [40] backports_1.4.1 deldir_1.0-6 vctrs_0.5.2
## [43] imputeLCMD_2.1 cachem_1.0.6 withr_2.5.0
## [46] checkmate_2.1.0 mnormt_2.1.1 cluster_2.1.4
## [49] lazyeval_0.2.2 crayon_1.5.2 pkgconfig_2.0.3
## [52] labeling_0.4.2 qqconf_1.3.1 nlme_3.1-160
## [55] nnet_7.3-18 rlang_1.0.6 lifecycle_1.0.3
## [58] sandwich_3.0-2 affyio_1.68.0 mathjaxr_1.6-0
## [61] modelr_0.1.10 cellranger_1.1.0 graph_1.76.0
## [64] Matrix_1.5-3 zoo_1.8-11 reprex_2.0.2
## [67] base64enc_0.1-3 GlobalOptions_0.1.2 googlesheets4_1.0.1
## [70] png_0.1-8 viridisLite_0.4.1 rjson_0.2.21
## [73] bitops_1.0-7 shinydashboard_0.7.2 Biostrings_2.66.0
## [76] blob_1.2.3 shape_1.4.6 jpeg_0.1-10
## [79] tmvtnorm_1.5 scales_1.2.1 memoise_2.0.1
## [82] graphite_1.44.0 magrittr_2.0.3 plyr_1.8.8
## [85] hexbin_1.28.2 zlibbioc_1.44.0 compiler_4.2.2
## [88] RColorBrewer_1.1-3 plotrix_3.8-2 pcaMethods_1.90.0
## [91] clue_0.3-64 KEGGgraph_1.58.3 cli_3.6.0
## [94] affy_1.76.0 XVector_0.38.0 htmlTable_2.4.1
## [97] Formula_1.2-4 MASS_7.3-58.1 tidyselect_1.2.0
## [100] vsn_3.66.0 stringi_1.7.12 highr_0.10
## [103] yaml_2.3.7 proDA_1.12.0 askpass_1.1
## [106] norm_1.0-10.0 latticeExtra_0.6-30 grid_4.2.2
## [109] sass_0.4.5 fastmatch_1.1-3 tools_4.2.2
## [112] timechange_0.2.0 parallel_4.2.2 circlize_0.4.15
## [115] rstudioapi_0.14 foreach_1.5.2 foreign_0.8-83
## [118] gridExtra_2.3 farver_2.1.1 Rtsne_0.16
## [121] digest_0.6.31 BiocManager_1.30.19 Rcpp_1.0.10
## [124] broom_1.0.3 shinyhelper_0.3.2 later_1.3.0
## [127] org.Hs.eg.db_3.16.0 httr_1.4.4 AnnotationDbi_1.60.0
## [130] ComplexHeatmap_2.14.0 psych_2.2.9 Rdpack_2.4
## [133] colorspace_2.1-0 rvest_1.0.3 XML_3.99-0.13
## [136] fs_1.6.0 reticulate_1.28 umap_0.2.9.0
## [139] splines_4.2.2 sn_2.1.0 multtest_2.54.0
## [142] gmm_1.7 xtable_1.8-4 jsonlite_1.8.4
## [145] UpSetR_1.4.0 R6_2.5.1 TFisher_0.2.0
## [148] Hmisc_4.7-2 pillar_1.8.1 htmltools_0.5.4
## [151] mime_0.12 glue_1.6.2 fastmap_1.1.0
## [154] BiocParallel_1.32.5 codetools_0.2-18 fgsea_1.24.0
## [157] mvtnorm_1.1-3 utf8_1.2.3 lattice_0.20-45
## [160] bslib_0.4.2 numDeriv_2016.8-1.1 curl_5.0.0
## [163] shinyjs_2.1.0 openssl_2.0.5 interp_1.1-3
## [166] survival_3.4-0 rmarkdown_2.20 munsell_0.5.0
## [169] GetoptLong_1.0.5 GenomeInfoDbData_1.2.9 iterators_1.0.14
## [172] impute_1.72.3 haven_2.5.1 gtable_0.3.1
## [175] rbibutils_2.2.13
European Molecular Biology Laboratory, Meyerhofstrasse 1, 69117 Heidelberg, Germany↩︎